library(enrichR)
library(DT)
library(viridis)
library(ggVennDiagram)
library(patchwork)
library(tidyverse)
devtools::load_all(".")
pbmc_lupus <- readRDS("~/popuDE/reproducibility/batch_correction/pbmc_lupus_bc.rds")
pbmc_covid <- readRDS("~/popuDE/reproducibility/batch_correction/pbmc_covid_bc.rds")
plot.venn <- function(popuDE.res, baseline.res, p.sig, thre.type,
logFC.table=NULL, logFC.threshold=0.25,
limits=c(0, 3000)){
celltypes <- dimnames(baseline.res[[1]])[[2]]
subjects <- dimnames(baseline.res[[1]])[[3]]
pic.list <- list()
if (thre.type == 'fdr'){
thre.name <- names(baseline.res)[str_ends(names(baseline.res), 'fdr_info')]
}else if (thre.type == 'pval'){
thre.name <- names(baseline.res)[str_ends(names(baseline.res), 'pval_info')]
}else{
stop("Wrong 'thre.type' argument.")
}
bs.name <- str_split_i(thre.name, pattern = '\\.', i=1)
for (celltype in celltypes){
p.markers <- names(which(popuDE.res[[celltype]]$pp.d1 > 1-p.sig))
if (length(subjects) == 1){
w.markers <- names(which(baseline.res[[thre.name]][,celltype,] < p.sig))
}else{
w.markers <- names(which(rowSums(baseline.res[[thre.name]][,celltype,]< p.sig) > 0))
}
if (!is.null(logFC.table)){
fc.genes <- names(which(logFC.table[,celltype] > logFC.threshold))
p.markers <- intersect(p.markers, fc.genes)
w.markers <- intersect(w.markers, fc.genes)
}
to.plot.data <- list('Our method'=p.markers, bs.name=w.markers)
names(to.plot.data) <- c('Our method', bs.name)
pic <- ggVennDiagram(to.plot.data, label_alpha=0, label_color = "red", label_size = 4, edge_size = 1) +
scale_x_continuous(expand = expansion(mult = .2)) +
scale_fill_distiller(palette = "YlGnBu", direction = 1, limits = limits) +
labs(title=celltype) +
theme(text = element_text(size = 18),
plot.title=element_text(hjust=0.5, size=18, face='bold'))
pic.list[[celltype]] <- pic
}
p <- pic.list[[names(pic.list)[1]]]
for (celltype in names(pic.list)[-1]){
p <- p + pic.list[[celltype]]
}
p + plot_layout(ncol = 2) +
plot_annotation(title=str_glue("adj.p.threshold = {p.sig}"), theme = theme(plot.title = element_text(hjust=0.5, size = 20, face='bold')))
}
enrich.sig.genes <- function(popuDE.res, baseline.res, p.sig, is.popuDE.markers, is.baseline.markers){
celltypes <- dimnames(baseline.res[[1]])[[2]]
subjects <- dimnames(baseline.res[[1]])[[3]]
fdr.name <- names(baseline.res)[str_ends(names(baseline.res), 'fdr_info')]
bs.name <- str_split_i(fdr.name, pattern = '\\.', i=1)
res.list = list()
for (celltype in celltypes){
p.markers <- names(which(sort(popuDE.res[[celltype]]$pp.d1, decreasing = T) > 1-p.sig))
if (length(subjects) == 1){
w.markers <- names(which(sort(baseline.res[[fdr.name]][,celltype,]) < p.sig))
}else{
w.markers <- names(which(rowSums(baseline.res[[fdr.name]][,celltype,]< p.sig) > 0))
}
p.not.bs <- setdiff(p.markers, w.markers)
bs.not.p <- setdiff(w.markers, p.markers)
p.and.bs <- intersect(p.markers, w.markers)
number.cutoff <- min(length(bs.not.p), length(p.not.bs))
cat(celltype, "number of genes cut off:", number.cutoff, "\n")
if (is.popuDE.markers & is.baseline.markers){ # both p-markers and w-markers
to.enrich.genes <- p.and.bs
}else if (is.popuDE.markers & !is.baseline.markers){ # p-markers but not w-markers
if (length(subjects) == 1){
# for corrected lognormcounts as a whole,
# ensure the number of bs-markers == the number of p-markers
cat("Ensure the number of baseline markers == p-markers\n")
to.enrich.genes <- p.not.bs[1:number.cutoff]
}else{
to.enrich.genes <- p.not.bs
}
}else if (!is.popuDE.markers & is.baseline.markers){ # not p-markers but w-markers
if (length(subjects) == 1){
# for corrected lognormcounts as a whole,
# ensure the number of bs-markers == the number of p-markers
cat("Ensure the number of baseline markers == p-markers\n")
to.enrich.genes <- bs.not.p[1:number.cutoff]
}else{
to.enrich.genes <- bs.not.p
}
}else{
stop("is.popuDE.markers and is.baseline.markers can not both be FALSE.")
}
if (length(to.enrich.genes) > 0){
enriched <- enrichr(to.enrich.genes, c('Human_Gene_Atlas'))
celltype.res <- enriched$Human_Gene_Atlas[1:10,c(1:4)] %>%
format(scientific=T, digits=4) %>%
unite(!!sym(celltype), c('Term', 'Adjusted.P.value'), sep = '\n P.adj=') %>%
dplyr::select(!!sym(celltype))
res.list <- append(res.list, celltype.res)
cat("DE genes of ", celltype, "has been enriched.")
}
}
caption <- str_glue("is.our.method.markers = {is.popuDE.markers},
is.{bs.name}.markers = {is.baseline.markers},
adj.prob.threshold = {p.sig}")
res.list %>% bind_rows() %>%
datatable(extensions = 'Buttons',
caption = caption,
options = list(dom = 'Blfrtip',
buttons = c('copy', 'csv', 'excel', 'pdf', 'print'),
lengthMenu = list(c(10,25,50,-1),
c(10,25,50,"All"))))
}
load.or.run <- function(our.method, bs.method, dataset, sub.rep, ct.rep, per.subject){
norm.rep <- if (per.subject) NULL else {"scMerge_logcounts"}
# norm.rep <- ifelse(per.subject, NULL, "scMerge_logcounts")
count.type <- ifelse(per.subject, 'uncorrected_normcounts', 'corrected_lognormcounts')
save.path <- file.path('result', dataset, count.type)
if (!dir.exists(save.path)) dir.create(save.path)
bs.file.prefix <- ifelse(per.subject, 'per_subject.rds', 'as_whole.rds')
bs.res.file <- paste(bs.method, bs.file.prefix, sep = '_')
bs.res.path <- file.path(save.path, bs.res.file)
our.res.file <- paste0(our.method, '.rds')
our.res.path <- file.path(save.path, our.res.file)
if (bs.res.file %in% list.files(save.path)){bs.res <- readRDS(bs.res.path)}else{
bs.res <- runBaselineMethod(
get(dataset), per.subject=per.subject, numCores=20, celltype.rep=ct.rep,
subject.rep=sub.rep, method=bs.method, use.norm.rep=norm.rep,
log.input=!per.subject)
saveRDS(bs.res, bs.res.path)
}
if (our.res.file %in% list.files(save.path)){popuDE.res <- readRDS(our.res.path)}else{
popuDE.res <- popuDE(
get(dataset), effect_thres=0, tol=1e-5, numCores=20, celltype=ct.rep,
subject=sub.rep, use.norm.rep=norm.rep,
log.input=!per.subject)
saveRDS(popuDE.res, our.res.path)
}
return(list(our=popuDE.res, bs=bs.res))
}
res <- load.or.run('popuDE', 'wilcox', 'pbmc_lupus', 'subject', 'celltype', per.subject=TRUE)
plot.venn(res$our, res$bs, p.sig=0.05, thre.type='fdr', limits=c(0, 5500))
enrich.sig.genes(res$our, res$bs, p.sig=0.05, is.popuDE.markers=T, is.baseline.markers=F)
## B cells number of genes cut off: 38
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of B cells has been enriched.CD14+ Monocytes number of genes cut off: 0
## CD4 T cells number of genes cut off: 151
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD4 T cells has been enriched.CD8 T cells number of genes cut off: 174
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD8 T cells has been enriched.Dendritic cells number of genes cut off: 0
## FCGR3A+ Monocytes number of genes cut off: 0
## NK cells number of genes cut off: 391
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of NK cells has been enriched.
res <- load.or.run('popuDE', 'wilcox', 'pbmc_lupus', 'subject', 'celltype', per.subject=FALSE)
plot.venn(res$our, res$bs, p.sig=0.05, thre.type='fdr', limits=c(0, 3500))
enrich.sig.genes(res$our, res$bs, p.sig=0.05, is.popuDE.markers=T, is.baseline.markers=F)
## B cells number of genes cut off: 443
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of B cells has been enriched.CD14+ Monocytes number of genes cut off: 455
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD14+ Monocytes has been enriched.CD4 T cells number of genes cut off: 1310
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD4 T cells has been enriched.CD8 T cells number of genes cut off: 503
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD8 T cells has been enriched.Dendritic cells number of genes cut off: 36
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Dendritic cells has been enriched.FCGR3A+ Monocytes number of genes cut off: 198
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of FCGR3A+ Monocytes has been enriched.NK cells number of genes cut off: 1054
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of NK cells has been enriched.
enrich.sig.genes(res$our, res$bs, p.sig=0.05, is.popuDE.markers=F, is.baseline.markers=T)
## B cells number of genes cut off: 443
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of B cells has been enriched.CD14+ Monocytes number of genes cut off: 455
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD14+ Monocytes has been enriched.CD4 T cells number of genes cut off: 1310
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD4 T cells has been enriched.CD8 T cells number of genes cut off: 503
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD8 T cells has been enriched.Dendritic cells number of genes cut off: 36
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Dendritic cells has been enriched.FCGR3A+ Monocytes number of genes cut off: 198
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of FCGR3A+ Monocytes has been enriched.NK cells number of genes cut off: 1054
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of NK cells has been enriched.
res <- load.or.run('popuDE', 'wilcox', 'pbmc_covid', 'sampleID', 'majorType', per.subject=TRUE)
plot.venn(res$our, res$bs, p.sig=0.05, thre.type='fdr', limits=c(0, 8000))
enrich.sig.genes(res$our, res$bs, p.sig=0.05, is.popuDE.markers=T, is.baseline.markers=F)
## B number of genes cut off: 477
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of B has been enriched.CD4 number of genes cut off: 193
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD4 has been enriched.CD8 number of genes cut off: 1115
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD8 has been enriched.DC number of genes cut off: 2
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of DC has been enriched.Mega number of genes cut off: 35
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Mega has been enriched.Mono number of genes cut off: 1
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Mono has been enriched.NK number of genes cut off: 862
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of NK has been enriched.Plasma number of genes cut off: 0
res <- load.or.run('popuDE', 'wilcox', 'pbmc_covid', 'sampleID', 'majorType', per.subject=FALSE)
plot.venn(res$our, res$bs, p.sig=0.05, thre.type='fdr', limits=c(0, 6000))
enrich.sig.genes(res$our, res$bs, p.sig=0.05, is.popuDE.markers=T, is.baseline.markers=F)
## B number of genes cut off: 1613
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of B has been enriched.CD4 number of genes cut off: 2314
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD4 has been enriched.CD8 number of genes cut off: 1787
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD8 has been enriched.DC number of genes cut off: 119
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of DC has been enriched.Mega number of genes cut off: 627
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Mega has been enriched.Mono number of genes cut off: 1538
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Mono has been enriched.NK number of genes cut off: 1953
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of NK has been enriched.Plasma number of genes cut off: 197
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Plasma has been enriched.
enrich.sig.genes(res$our, res$bs, p.sig=0.05, is.popuDE.markers=F, is.baseline.markers=T)
## B number of genes cut off: 1613
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of B has been enriched.CD4 number of genes cut off: 2314
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD4 has been enriched.CD8 number of genes cut off: 1787
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of CD8 has been enriched.DC number of genes cut off: 119
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of DC has been enriched.Mega number of genes cut off: 627
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Mega has been enriched.Mono number of genes cut off: 1538
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Mono has been enriched.NK number of genes cut off: 1953
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of NK has been enriched.Plasma number of genes cut off: 197
## Ensure the number of baseline markers == p-markers
## Uploading data to Enrichr... Done.
## Querying Human_Gene_Atlas... Done.
## Parsing results... Done.
## DE genes of Plasma has been enriched.